Library
rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite)
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'DmIALite' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'DOLite' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'DOLiteTerm' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'HsIALite' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'MmIALite' introuvable
## Warning in rm(DmIALite, DOLite, DOLiteTerm, HsIALite, MmIALite, RnIALite):
## objet 'RnIALite' introuvable
library(dplyr)
##
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
##
## filter, lag
## Les objets suivants sont masqués depuis 'package:base':
##
## intersect, setdiff, setequal, union
library(FactoMineR)
library(factoextra)
## Le chargement a nécessité le package : ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(sva)
## Le chargement a nécessité le package : mgcv
## Le chargement a nécessité le package : nlme
##
## Attachement du package : 'nlme'
## L'objet suivant est masqué depuis 'package:dplyr':
##
## collapse
## This is mgcv 1.9-0. For overview type 'help("mgcv-package")'.
## Le chargement a nécessité le package : genefilter
## Le chargement a nécessité le package : BiocParallel
library(xlsx)
library(clusterProfiler)
##
## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## inner_join.phylo tidytree
## inner_join.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
## clusterProfiler v4.8.2 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attachement du package : 'clusterProfiler'
## L'objet suivant est masqué depuis 'package:stats':
##
## filter
library(pheatmap)
library(rdist)
library(DEP)
library(SummarizedExperiment)
## Le chargement a nécessité le package : MatrixGenerics
## Le chargement a nécessité le package : matrixStats
##
## Attachement du package : 'matrixStats'
## Les objets suivants sont masqués depuis 'package:genefilter':
##
## rowSds, rowVars
## L'objet suivant est masqué depuis 'package:dplyr':
##
## count
##
## Attachement du package : 'MatrixGenerics'
## Les objets suivants sont masqués depuis 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Les objets suivants sont masqués depuis 'package:genefilter':
##
## rowSds, rowVars
## Le chargement a nécessité le package : GenomicRanges
## Le chargement a nécessité le package : stats4
## Le chargement a nécessité le package : BiocGenerics
##
## Attachement du package : 'BiocGenerics'
## Les objets suivants sont masqués depuis 'package:dplyr':
##
## combine, intersect, setdiff, union
## Les objets suivants sont masqués depuis 'package:stats':
##
## IQR, mad, sd, var, xtabs
## Les objets suivants sont masqués depuis 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Le chargement a nécessité le package : S4Vectors
##
## Attachement du package : 'S4Vectors'
## L'objet suivant est masqué depuis 'package:clusterProfiler':
##
## rename
## Les objets suivants sont masqués depuis 'package:dplyr':
##
## first, rename
## L'objet suivant est masqué depuis 'package:utils':
##
## findMatches
## Les objets suivants sont masqués depuis 'package:base':
##
## expand.grid, I, unname
## Le chargement a nécessité le package : IRanges
##
## Attachement du package : 'IRanges'
## L'objet suivant est masqué depuis 'package:clusterProfiler':
##
## slice
## L'objet suivant est masqué depuis 'package:nlme':
##
## collapse
## Les objets suivants sont masqués depuis 'package:dplyr':
##
## collapse, desc, slice
## Le chargement a nécessité le package : GenomeInfoDb
## Le chargement a nécessité le package : Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attachement du package : 'Biobase'
## L'objet suivant est masqué depuis 'package:MatrixGenerics':
##
## rowMedians
## Les objets suivants sont masqués depuis 'package:matrixStats':
##
## anyMissing, rowMedians
library(org.Hs.eg.db)
## Le chargement a nécessité le package : AnnotationDbi
##
## Attachement du package : 'AnnotationDbi'
## L'objet suivant est masqué depuis 'package:clusterProfiler':
##
## select
## L'objet suivant est masqué depuis 'package:dplyr':
##
## select
##
library(pathview)
## ##############################################################################
## Pathview is an open source software package distributed under GNU General
## Public License version 3 (GPLv3). Details of GPLv3 is available at
## http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
## formally cite the original Pathview paper (not just mention it) in publications
## or products. For details, do citation("pathview") within R.
##
## The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
## license agreement (details at http://www.kegg.jp/kegg/legal.html).
## ##############################################################################
library(enrichplot)
library(DOSE)
## DOSE v3.26.1 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
library(biomaRt)
deal_with_NA <- function(df, column, value_to_change, final_value){
for (i in column){
print(i)
data.table::set(df,which(df[[i]] == value_to_change), i, final_value)
}
}
"%ni%" <- Negate("%in%")
Preparing analysis
Making sub-data
data <- Discovery_Cohort_Proteomic_unimputed
data$PG.Genes %>% duplicated() %>% any()
## [1] TRUE
data %>% group_by(PG.Genes) %>% summarize(frequency = n()) %>% arrange(desc(frequency)) %>% filter(frequency > 1)
## # A tibble: 1 × 2
## PG.Genes frequency
## <chr> <int>
## 1 "" 14
data_unique <- make_unique(data, "PG.Genes", "PG.UniProtIds", delim = ";")
data_unique_log_reverted <- sapply(data_unique[,1:177], function(x){
sapply(x, function(y){
exp(y)
})
})
IDHwt_samples <- dplyr::filter(Pheno, pheno == "Group_control") %>% .$Patient_ID
# IDH1
sampling_IDHwt_IDH1 <- (Pheno$pheno == "IDH1") %>% table %>% .[2] %>% sample(IDHwt_samples, ., replace = F)
data_unique_log_reverted_IDH1_IDHwt <- data_unique_log_reverted[,(Pheno$pheno == "IDH1" | Pheno$Patient_ID %in% sampling_IDHwt_IDH1)]
data_unique_log_reverted_IDH1_IDHwt <- cbind(data_unique_log_reverted_IDH1_IDHwt, data_unique[,178:183])
Pheno_IDH1_wt <- Pheno[Pheno$pheno == "IDH1" | Pheno$Patient_ID %in% sampling_IDHwt_IDH1,]
Pheno_IDH1_wt$pheno <- sapply(Pheno_IDH1_wt$pheno, function(sample){
ifelse(stringr::str_detect(sample, "IDH1"), "mIDH1", "wtIDH")
})
pheno_se <- data.frame("label" = colnames(data)[1:177], condition = Pheno$pheno, replicate = 1:177)
pheno_se_idh1_wt <- pheno_se[pheno_se$condition == "IDH1" | pheno_se$label %in% sampling_IDHwt_IDH1,]
data_se_idh1_wt <- make_se(data_unique_log_reverted_IDH1_IDHwt, 1:nrow(Pheno_IDH1_wt), pheno_se_idh1_wt)
data_se_idh1_wt
## class: SummarizedExperiment
## dim: 6536 28
## metadata(0):
## assays(1): ''
## rownames(6536): A1BG A2M ... Q6ZSR9 R4GMS1
## rowData names(6): PG.ProteinAccessions PG.Genes ... name ID
## colnames(28): Group_control_4 Group_control_7 ... IDH1_160
## Group_control_176
## colData names(4): label ID condition replicate
plot_frequency(data_se_idh1_wt)

# IDH2
sampling_IDHwt_IDH2 <- (Pheno$pheno == "IDH2") %>% table %>% .[2] %>% sample(IDHwt_samples, ., replace = F)
data_unique_log_reverted_IDH2_IDHwt <- data_unique_log_reverted[,(Pheno$pheno == "IDH2" | Pheno$Patient_ID %in% sampling_IDHwt_IDH2)]
data_unique_log_reverted_IDH2_IDHwt <- cbind(data_unique_log_reverted_IDH2_IDHwt, data_unique[,178:183])
Pheno_IDH2_wt <- Pheno[Pheno$pheno == "IDH2" | Pheno$Patient_ID %in% sampling_IDHwt_IDH2,]
Pheno_IDH2_wt$pheno <- sapply(Pheno_IDH2_wt$pheno, function(sample){
ifelse(stringr::str_detect(sample, "IDH2"), "mIDH2", "wtIDH")
})
pheno_se_idh2_wt <- pheno_se[pheno_se$condition == "IDH2" | pheno_se$label %in% sampling_IDHwt_IDH2,]
data_se_idh2_wt <- make_se(data_unique_log_reverted_IDH2_IDHwt, 1:nrow(Pheno_IDH2_wt), pheno_se_idh2_wt)
data_se_idh2_wt
## class: SummarizedExperiment
## dim: 6536 48
## metadata(0):
## assays(1): ''
## rownames(6536): A1BG A2M ... Q6ZSR9 R4GMS1
## rowData names(6): PG.ProteinAccessions PG.Genes ... name ID
## colnames(48): IDH2_2 Group_control_3 ... Group_control_170 IDH2_175
## colData names(4): label ID condition replicate
plot_frequency(data_se_idh2_wt)

# IDH1 vs IDH2
data_unique_log_reverted_IDH1_IDH2 <- data_unique_log_reverted[,Pheno$pheno == "IDH1" | Pheno$pheno == "IDH2"]
data_unique_log_reverted_IDH1_IDH2 <- cbind(data_unique_log_reverted_IDH1_IDH2, data_unique[,178:183])
Pheno_IDH <- Pheno[Pheno$pheno == "IDH1" | Pheno$pheno == "IDH2",]
pheno_se_IDH <- pheno_se[pheno_se$condition == "IDH1" | pheno_se$condition == "IDH2",]
data_se_IDH <- make_se(data_unique_log_reverted_IDH1_IDH2, 1:nrow(Pheno_IDH), pheno_se_IDH)
data_se_IDH
## class: SummarizedExperiment
## dim: 6536 38
## metadata(0):
## assays(1): ''
## rownames(6536): A1BG A2M ... Q6ZSR9 R4GMS1
## rowData names(6): PG.ProteinAccessions PG.Genes ... name ID
## colnames(38): IDH2_2 IDH1_13 ... IDH2_163 IDH2_175
## colData names(4): label ID condition replicate
plot_frequency(data_se_IDH)

Looking at missing values
data_filt_IDH1 <- filter_missval(data_se_idh1_wt, thr = 0)
data_filt_IDH2 <- filter_missval(data_se_idh2_wt, thr = 0)
data_filt_IDH <- filter_missval(data_se_IDH, thr = 0)
plot_numbers(data_filt_IDH1)

plot_numbers(data_filt_IDH2)

plot_numbers(data_filt_IDH)

plot_coverage(data_filt_IDH1)

plot_coverage(data_filt_IDH2)

plot_coverage(data_filt_IDH)

Normalisation
for (i in colnames(data_filt_IDH1@assays@data@listData[[1]])){
data_filt_IDH1@assays@data@listData[[1]][,i][is.nan(data_filt_IDH1@assays@data@listData[[1]][,i])]<-NA
}
for (i in colnames(data_filt_IDH2@assays@data@listData[[1]])){
data_filt_IDH2@assays@data@listData[[1]][,i][is.nan(data_filt_IDH2@assays@data@listData[[1]][,i])]<-NA
}
for (i in colnames(data_filt_IDH@assays@data@listData[[1]])){
data_filt_IDH@assays@data@listData[[1]][,i][is.nan(data_filt_IDH@assays@data@listData[[1]][,i])]<-NA
}
data_norm_IDH1 <- normalize_vsn(data_filt_IDH1)
data_norm_IDH2 <- normalize_vsn(data_filt_IDH2)
data_norm_IDH <- normalize_vsn(data_filt_IDH)
plot_normalization(data_filt_IDH1[,1:5], data_norm_IDH1[,1:5])

plot_normalization(data_filt_IDH2[,1:5], data_norm_IDH2[,1:5])

plot_normalization(data_filt_IDH[,1:5], data_norm_IDH[,1:5])

plot_missval(data_filt_IDH1[, 1:10])

plot_missval(data_filt_IDH2[, 1:10])

plot_missval(data_filt_IDH[, 1:10])

plot_detect(data_filt_IDH1)

plot_detect(data_filt_IDH2)

plot_detect(data_filt_IDH)

Imputation of missing values
data_imp_IDH1 <- impute(data_norm_IDH1, fun = "MinProb", q = 0.01)
## Imputing along margin 2 (samples/columns).
## [1] 0.9486871
data_imp_IDH2 <- impute(data_norm_IDH2, fun = "MinProb", q = 0.01)
## Imputing along margin 2 (samples/columns).
## [1] 0.9053707
data_imp_IDH <- impute(data_norm_IDH, fun = "MinProb", q = 0.01)
## Imputing along margin 2 (samples/columns).
## [1] 0.8914037
data_imp_IDH1@assays@data@listData[[1]] %>%
write.table("~/GitHub/Thesis_paper/Datasets/Proteomic/mIDH1_data.tsv",
sep = "\t", row.names = T, col.names = NA, quote = F)
data_imp_IDH2@assays@data@listData[[1]] %>%
write.table("~/GitHub/Thesis_paper/Datasets/Proteomic/mIDH2_data.tsv",
sep = "\t", row.names = T, col.names = NA, quote = F)
data_imp_IDH@assays@data@listData[[1]] %>%
write.table("~/GitHub/Thesis_paper/Datasets/Proteomic/mIDH_data.tsv",
sep = "\t", row.names = T, col.names = NA, quote = F)
plot_imputation(data_norm_IDH1, data_imp_IDH1)

plot_imputation(data_norm_IDH2, data_imp_IDH2)

plot_imputation(data_norm_IDH, data_imp_IDH)

Comparison of pheno analysis
DEG
data_diff_IDH1 <- test_diff(data_imp_IDH1, type = "control", control = "Group_control")
## Tested contrasts: IDH1_vs_Group_control
data_diff_IDH2 <- test_diff(data_imp_IDH2, type = "control", control = "Group_control")
## Tested contrasts: IDH2_vs_Group_control
data_diff_IDH <- test_diff(data_imp_IDH, type = "control", control = "IDH1")
## Tested contrasts: IDH2_vs_IDH1
dep_IDH1 <- add_rejections(data_diff_IDH1, alpha = 0.05, lfc = 0.5)
dep_IDH2 <- add_rejections(data_diff_IDH2, alpha = 0.05, lfc = 0.5)
dep_IDH <- add_rejections(data_diff_IDH, alpha = 0.05, lfc = 0.5)
PCA
plot_pca(dep_IDH1, n = 500, point_size = 4, label = F, indicate = "condition")
## Warning: Use of `pca_df[[indicate[1]]]` is discouraged.
## ℹ Use `.data[[indicate[1]]]` instead.

plot_pca(dep_IDH2, n = 500, point_size = 4, label = F, indicate = "condition")
## Warning: Use of `pca_df[[indicate[1]]]` is discouraged.
## ℹ Use `.data[[indicate[1]]]` instead.

plot_pca(dep_IDH, n = 500, point_size = 4, label = F, indicate = "condition")
## Warning: Use of `pca_df[[indicate[1]]]` is discouraged.
## ℹ Use `.data[[indicate[1]]]` instead.

Heatmap
plot_cor(dep_IDH1, significant = FALSE, lower = 0.75, upper = 1, pal = "Reds")

plot_cor(dep_IDH2, significant = FALSE, lower = 0.75, upper = 1, pal = "Reds")

plot_cor(dep_IDH, significant = FALSE, lower = 0.75, upper = 1, pal = "Reds")

plot_heatmap(dep_IDH1, type = "centered", kmeans = FALSE,
k = 2, show_row_names = FALSE,
indicate = "condition")

plot_heatmap(dep_IDH2, type = "centered", kmeans = FALSE,
k = 2, show_row_names = FALSE,
indicate = "condition")

plot_heatmap(dep_IDH, type = "centered", kmeans = FALSE,
k = 2, show_row_names = FALSE,
indicate = "condition")

Volcano
plot_volcano(dep_IDH1, contrast = "IDH1_vs_Group_control", label_size = 5, add_names = TRUE, adjusted = F)

plot_volcano(dep_IDH2, contrast = "IDH2_vs_Group_control", label_size = 5, add_names = TRUE, adjusted = F)

plot_volcano(dep_IDH, contrast = "IDH2_vs_IDH1", label_size = 5, add_names = TRUE, adjusted = F)

plot_single(dep_IDH1, proteins = "CEBPA", type = "centered") + theme(legend.position = "none")

#plot_single(dep_IDH2, proteins = "CEBPZ", type = "centered") + theme(legend.position = "none")
plot_single(dep_IDH, proteins = "CEBPA", type = "centered") + theme(legend.position = "none")

plot_single(dep_IDH1, proteins = "HDGFRP2", type = "centered") + theme(legend.position = "none")

plot_single(dep_IDH2, proteins = "HDGFRP2", type = "centered") + theme(legend.position = "none")

plot_single(dep_IDH, proteins = "HDGFRP2", type = "centered") + theme(legend.position = "none")

df_long_IDH1 <- get_df_long(dep_IDH1)
df_long_IDH2 <- get_df_long(dep_IDH2)
df_long_IDH <- get_df_long(dep_IDH)
Saving DEG
Diff_Prot_exp_IDH1 <- dep_IDH1@elementMetadata@listData %>%
as.data.frame() %>%
dplyr::select(c("name", "IDH1_vs_Group_control_diff", "IDH1_vs_Group_control_p.val"))
Diff_Prot_exp_IDH2 <- dep_IDH2@elementMetadata@listData %>%
as.data.frame() %>%
dplyr::select(c("name", "IDH2_vs_Group_control_diff", "IDH2_vs_Group_control_p.val"))
Diff_Prot_exp_IDH <- dep_IDH@elementMetadata@listData %>%
as.data.frame() %>%
dplyr::select(c("name", "IDH2_vs_IDH1_diff", "IDH2_vs_IDH1_p.val"))
write.table(Diff_Prot_exp_IDH1, "~/GitHub/Thesis_paper/Results/Proteo/Diff_Prot_exp_IDH1_IDHwt.tsv", quote = F, row.names = F, sep = "\t")
Diff_Prot_exp_IDH1_sig <- Diff_Prot_exp_IDH1 %>%
dplyr::filter(IDH1_vs_Group_control_p.val < 0.05 & abs(IDH1_vs_Group_control_diff) > 0.5)
Diff_Prot_exp_IDH1_sig %>%
write.table("~/GitHub/Thesis_paper/Results/Proteo/Diff_sig_Prot_exp_IDH1_IDHwt.tsv", quote = F, row.names = F, sep = "\t")
write.table(Diff_Prot_exp_IDH2, "~/GitHub/Thesis_paper/Results/Proteo/Diff_Prot_exp_IDH2_IDHwt.tsv", quote = F, row.names = F, sep = "\t")
Diff_Prot_exp_IDH2_sig <- Diff_Prot_exp_IDH2 %>%
dplyr::filter(IDH2_vs_Group_control_p.val < 0.05 & abs(IDH2_vs_Group_control_diff) > 0.5)
Diff_Prot_exp_IDH2_sig %>%
write.table("~/GitHub/Thesis_paper/Results/Proteo/Diff_sig_Prot_exp_IDH2_IDHwt.tsv", quote = F, row.names = F, sep = "\t")
write.table(Diff_Prot_exp_IDH, "~/GitHub/Thesis_paper/Results/Proteo/Diff_Prot_exp_IDH2_IDH1.tsv", quote = F, row.names = F, sep = "\t")
Diff_Prot_exp_IDH_sig <- Diff_Prot_exp_IDH %>%
dplyr::filter(IDH2_vs_IDH1_p.val < 0.05 & abs(IDH2_vs_IDH1_diff) > 0.5)
Diff_Prot_exp_IDH_sig %>%
write.table("~/GitHub/Thesis_paper/Results/Proteo/Diff_sig_Prot_exp_IDH2_IDH1.tsv", quote = F, row.names = F, sep = "\t")
Diff_Prot_exp_IDH1 %>%
dplyr::filter(IDH1_vs_Group_control_p.val < 0.05 & abs(IDH1_vs_Group_control_diff) > 0.5) %>%
.$IDH1_vs_Group_control_diff %>% hist(breaks = 20)

Diff_Prot_exp_IDH2 %>%
dplyr::filter(IDH2_vs_Group_control_p.val < 0.05 & abs(IDH2_vs_Group_control_diff) > 0.5) %>%
.$IDH2_vs_Group_control_diff %>% hist(breaks = 20)

Diff_Prot_exp_IDH %>%
dplyr::filter(IDH2_vs_IDH1_p.val < 0.05 & abs(IDH2_vs_IDH1_diff) > 0.5) %>%
.$IDH2_vs_IDH1_diff %>% hist(breaks = 20)

GO KEGG WP Enrichments
Functions
GO_KEGG_WP_MKEGG <- function(List_positive, List_negative){
list_enrichments <- list()
list_enrichments[["GO_positive"]] <- enrichGO(List_positive, OrgDb = org.Hs.eg.db, universe = GO_universe, keyType = "SYMBOL", pvalueCutoff = 0.2)
list_enrichments[["GO_negative"]] <- enrichGO(List_negative, OrgDb = org.Hs.eg.db, universe = GO_universe, keyType = "SYMBOL", pvalueCutoff = 0.2)
entrez_id_positive <- dplyr::filter(genes, hgnc_symbol %in% List_positive) %>%
.$entrezgene_id %>% unique
entrez_id_negative <- dplyr::filter(genes, hgnc_symbol %in% List_negative) %>%
.$entrezgene_id %>% unique
universe_entrez <- dplyr::filter(genes, hgnc_symbol %in% GO_universe) %>%
.$entrezgene_id %>% unique
list_enrichments[["KEGG_positive"]] <- enrichKEGG(entrez_id_positive, organism = "hsa",
keyType = 'ncbi-geneid', pvalueCutoff = 0.1,
pAdjustMethod = "none", universe = universe_entrez)
list_enrichments[["KEGG_negative"]] <- enrichKEGG(entrez_id_negative, organism = "hsa",
keyType = 'ncbi-geneid', pvalueCutoff = 0.1,
pAdjustMethod = "none", universe = universe_entrez)
list_enrichments[["MKEGG_positive"]] <- enrichMKEGG(entrez_id_positive, organism = "hsa",
keyType = 'ncbi-geneid', pvalueCutoff = 0.1,
pAdjustMethod = "none", universe = universe_entrez)
list_enrichments[["MKEGG_negative"]] <- enrichMKEGG(entrez_id_negative, organism = "hsa",
keyType = 'ncbi-geneid', pvalueCutoff = 0.1,
pAdjustMethod = "none", universe = universe_entrez)
list_enrichments[["WP_positive"]] <- enrichWP(entrez_id_positive, organism = "Homo sapiens",
universe = universe_entrez)
list_enrichments[["WP_negative"]] <- enrichWP(entrez_id_negative, organism = "Homo sapiens",
universe = universe_entrez)
res <- lapply(names(list_enrichments), function(onto){
list_enrichments[[onto]]@result$qvalue <- list_enrichments[[onto]]@result$pvalue
list_enrichments[[onto]]@result$p.adjust <- list_enrichments[[onto]]@result$pvalue
list_enrichments[[onto]]
})
names(res) <- names(list_enrichments)
return(res)
}
Make_enrich_plot <- function(enrichment, path){
n_enrich <- nrow(dplyr::filter(enrichment@result, pvalue < 0.1))
if(n_enrich == 0){
return(NULL)
}
height_plot <- 200 + (n_enrich *120)
p <- dotplot(enrichment, showCategory = n_enrich)
ggsave(path, p, bg = "white", width = 3800, height = height_plot, units = "px", limitsize = FALSE)
p
}
mart <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
dataset = "hsapiens_gene_ensembl")
GO_universe <- Diff_Prot_exp_IDH1$name %>% unique
genes <- getBM(filters = "hgnc_symbol",
attributes = c("ensembl_gene_id","entrezgene_id", "hgnc_symbol"),
values = GO_universe,
mart = mart)
Diff prot
hs <- org.Hs.eg.db
DEProts_IDH1_up <- Diff_Prot_exp_IDH1 %>%
dplyr::filter(IDH1_vs_Group_control_p.val < 0.05 & IDH1_vs_Group_control_diff > 0.5) %>%
.$name
DEProts_IDH2_up <- Diff_Prot_exp_IDH2 %>%
dplyr::filter(IDH2_vs_Group_control_p.val < 0.05 & IDH2_vs_Group_control_diff > 0.5) %>%
.$name
DEProts_IDH_up <- Diff_Prot_exp_IDH %>%
dplyr::filter(IDH2_vs_IDH1_p.val < 0.05 & IDH2_vs_IDH1_diff > 0.5) %>%
.$name
DEProts_IDH1_down <- Diff_Prot_exp_IDH1 %>%
dplyr::filter(IDH1_vs_Group_control_p.val < 0.05 & IDH1_vs_Group_control_diff < -0.5) %>%
.$name
DEProts_IDH2_down <- Diff_Prot_exp_IDH2 %>%
dplyr::filter(IDH2_vs_Group_control_p.val < 0.05 & IDH2_vs_Group_control_diff < -0.5) %>%
.$name
DEProts_IDH_down <- Diff_Prot_exp_IDH %>%
dplyr::filter(IDH2_vs_IDH1_p.val < 0.05 & IDH2_vs_IDH1_diff < -0.5) %>%
.$name
GO KEGG WP
IDH1_GO_KEGG_WP_vs_Control <- GO_KEGG_WP_MKEGG(DEProts_IDH1_up, DEProts_IDH1_down)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
IDH2_GO_KEGG_WP_vs_Control <- GO_KEGG_WP_MKEGG(DEProts_IDH2_up, DEProts_IDH2_down)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
IDH2_GO_KEGG_WP_vs_IDH1 <- GO_KEGG_WP_MKEGG(DEProts_IDH_up, DEProts_IDH_down)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/hsa"...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
## `universe` is not in character and will be ignored...
Saving plots
lapply(names(IDH1_GO_KEGG_WP_vs_Control), function(enrichment){
Make_enrich_plot(IDH1_GO_KEGG_WP_vs_Control[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/IDH1_vs_IDHwt_", enrichment, ".png"))
})
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]
## NULL
##
## [[6]]

##
## [[7]]

##
## [[8]]

lapply(names(IDH2_GO_KEGG_WP_vs_Control), function(enrichment){
Make_enrich_plot(IDH2_GO_KEGG_WP_vs_Control[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/IDH2_vs_IDHwt_", enrichment, ".png"))
})
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

lapply(names(IDH2_GO_KEGG_WP_vs_IDH1), function(enrichment){
Make_enrich_plot(IDH2_GO_KEGG_WP_vs_IDH1[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/IDH1_vs_IDH2_", enrichment, ".png"))
})
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

Saving tables
lapply(names(IDH1_GO_KEGG_WP_vs_Control), function(enrichment){
IDH1_GO_KEGG_WP_vs_Control[[enrichment]]@result %>% .[c(1:5, 8:9)] %>%
write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/IDH1_vs_Control_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
lapply(names(IDH2_GO_KEGG_WP_vs_Control), function(enrichment){
IDH2_GO_KEGG_WP_vs_Control[[enrichment]]@result %>% .[c(1:5, 8:9)] %>%
write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/IDH2_vs_Control_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
lapply(names(IDH2_GO_KEGG_WP_vs_IDH1), function(enrichment){
IDH2_GO_KEGG_WP_vs_IDH1[[enrichment]]@result %>% .[c(1:5, 8:9)] %>%
write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/IDH2_vs_IDH1_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
GSEA analysis
Functions
Do_GSEA_analysis <- function(Diff_prot_data){
diff_prot <- Diff_prot_data[3] < 0.05
deprots <- Diff_prot_data[3][diff_prot]
names(deprots) <- Diff_prot_data$name[diff_prot]
deprots <- sort(deprots, decreasing = TRUE)
deprot_entrez <- deprots[names(deprots) %in% genes$hgnc_symbol]
names(deprot_entrez) <- sapply(names(deprot_entrez), function(hgnc_name){
dplyr::filter(genes, hgnc_symbol == hgnc_name) %>% .$entrezgene_id %>% unlist %>% unique
})
list_enrichments <- list()
list_enrichments[["KEGG"]] <- gseKEGG(deprot_entrez, minGSSize = 2, pvalueCutoff = 0.1, pAdjustMethod = "none", verbose = T)
list_enrichments[["MKEGG"]] <- gseMKEGG(deprot_entrez, minGSSize = 2, pvalueCutoff = 0.1, pAdjustMethod = "none", verbose = T)
list_enrichments[["WP"]] <- gseWP(deprot_entrez, organism = "Homo sapiens", minGSSize = 2, pvalueCutoff = 0.1, pAdjustMethod = "none", verbose = T)
res <- lapply(names(list_enrichments), function(onto){
list_enrichments[[onto]]@result$qvalue <- list_enrichments[[onto]]@result$pvalue
list_enrichments[[onto]]@result$p.adjust <- list_enrichments[[onto]]@result$pvalue
list_enrichments[[onto]]
})
names(res) <- names(list_enrichments)
res
}
KEGG MKEGG WP GSEA
IDH1_wt_GSEA <- Do_GSEA_analysis(Diff_Prot_exp_IDH1)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## no term enriched under specific pvalueCutoff...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
IDH2_wt_GSEA <- Do_GSEA_analysis(Diff_Prot_exp_IDH2)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## no term enriched under specific pvalueCutoff...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
IDH2_IDH1_GSEA <- Do_GSEA_analysis(Diff_Prot_exp_IDH)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/module"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## no term enriched under specific pvalueCutoff...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".
## leading edge analysis...
## done...
Saving plots
lapply(names(IDH1_wt_GSEA), function(enrichment){
Make_enrich_plot(IDH1_wt_GSEA[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/GSEA/GSEA_IDH1_vs_IDHwt_", enrichment, ".png"))
})
## [[1]]

##
## [[2]]
## NULL
##
## [[3]]

lapply(names(IDH2_wt_GSEA), function(enrichment){
Make_enrich_plot(IDH2_wt_GSEA[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/GSEA/GSEA_IDH2_vs_IDHwt_", enrichment, ".png"))
})
## [[1]]

##
## [[2]]
## NULL
##
## [[3]]

lapply(names(IDH2_IDH1_GSEA), function(enrichment){
Make_enrich_plot(IDH2_IDH1_GSEA[[enrichment]], paste0("~/GitHub/Thesis_paper/Results/Proteo/Figures/GSEA/GSEA_IDH2_vs_IDH1_", enrichment, ".png"))
})
## [[1]]

##
## [[2]]
## NULL
##
## [[3]]

Saving tables
lapply(names(IDH1_wt_GSEA), function(enrichment){
if(nrow(IDH1_wt_GSEA[[enrichment]]@result) > 0){
IDH1_wt_GSEA[[enrichment]]@result %>% .[c(1:6, 9:11)] %>%
write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/GSEA/IDH1_vs_Control_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
}
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
lapply(names(IDH2_wt_GSEA), function(enrichment){
if(nrow(IDH2_wt_GSEA[[enrichment]]@result) > 0){
IDH2_wt_GSEA[[enrichment]]@result %>% .[c(1:6, 9:11)] %>%
write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/GSEA/IDH2_vs_Control_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
}
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
lapply(names(IDH2_IDH1_GSEA), function(enrichment){
if(nrow(IDH2_IDH1_GSEA[[enrichment]]@result) > 0){
IDH2_IDH1_GSEA[[enrichment]]@result %>% .[c(1:6, 9:11)] %>%
write.table(paste0("~/GitHub/Thesis_paper/Results/Proteo/Tables/Enrichments/GSEA/IDH2_vs_IDH1_", enrichment, ".tsv"), sep = "\t", quote=F, col.names = NA)
}
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL